### Load packages
options(scipen = 999)
library(data.table)
library(magrittr)
source('~/Documents/R/Utils/functions/feature_comparison.R')
source('~/Documents/R/Utils/functions/psi.R')
source('~/Documents/R/Utils/functions/helper.R')

### Input data & parameters
# input: a data.table / list of data.tables with features of interest
input = fread("~/Documents/R/churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
inputGood <- input[Churn=="No"]
inputBad <- input[Churn=="Yes"]
# featureList: names of the features to display distribution over time
# the following code removes any variables with >35 unique values
featureList <- names(input)[input[, lapply(.SD, uniqueN) <= 35, .SDcols = names(input)] | 
                              as.logical(input[, lapply(.SD, is.numeric), .SDcols = names(input)])]
featureList <- setdiff(featureList, c("customerID", "Churn"))
  • Last updated on 2018-09-08
  • Features customerID, Churn are removed from feature distributions

psi <- purrr::map(featureList,
                   function(x) PSI(oldScore = inputGood[, get(x)],
                                   newScore = inputBad[, get(x)],
                                   dataNames = c("No", "Yes"), 
                                   quantile = quantile,
                                   showVis = F))
plot <- lapply(1L:length(featureList),
               function(x) list(htmltools::tags$h3(featureList[x]),
                                htmltools::tags$h5(paste0("PSI: ", psi[x])),
                                FeatureComparison(input = list(NoChurn = inputGood[, get(featureList[x])],
                                                               Churn = inputBad[, get(featureList[x])]),
                                                  vectorName = featureList[x]),
                                htmltools::tags$hr()))
htmltools::tagList(plot[rev(order(purrr::map_dbl(psi, 1)))])

Contract

PSI: 1.19

OnlineSecurity

PSI: 0.77

TechSupport

PSI: 0.75

InternetService

PSI: 0.61

DeviceProtection

PSI: 0.54

OnlineBackup

PSI: 0.51

PaymentMethod

PSI: 0.46

StreamingMovies

PSI: 0.41

StreamingTV

PSI: 0.38

PaperlessBilling

PSI: 0.2

Dependents

PSI: 0.15

Partner

PSI: 0.1

SeniorCitizen

PSI: 0.09

MultipleLines

PSI: 0

PhoneService

PSI: 0

gender

PSI: 0

TotalCharges

PSI: -1

MonthlyCharges

PSI: -1

tenure

PSI: -1